Ecoli

ecoli_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/ecoli_with_clust_labels.txt",header = TRUE )
library(tidyverse)
p = ecoli_labeled%>%
  ggplot(aes(x = mcg, y =   gvh  , group = class))+
  geom_point(aes(color=class))+


  labs(y= "Value of measure", x ='k')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Insect")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")
pca_res <- prcomp(ecoli_labeled%>%select(-class), scale. = TRUE, center = TRUE)
summary(pca_res)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.4851 1.2088 1.0961 0.9258 0.81819 0.69185 0.35556
## Proportion of Variance 0.3151 0.2087 0.1716 0.1225 0.09563 0.06838 0.01806
## Cumulative Proportion  0.3151 0.5238 0.6955 0.8179 0.91356 0.98194 1.00000
library(ggbiplot)

ggbiplot(pca_res, groups = ecoli_labeled$class, ellipse=TRUE)

ggbiplot(pca_res,ellipse=TRUE,choices=c(3,4), groups = ecoli_labeled$class)

Arrhythmia

arrhytmia_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/arrhythmia_with_clust_labels.txt",header = TRUE )
library(tidyverse)
p = ecoli_labeled%>%
  ggplot(aes(x = mcg, y =   gvh  , group = class))+
  geom_point(aes(color=class))+


  labs(y= "Value of measure", x ='k')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Insect")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")
pca_res <- prcomp(arrhytmia_labeled%>%select(-Class), scale. = TRUE, center = TRUE)
#summary(pca_res)
library(ggbiplot)

ggbiplot(pca_res, groups = as.factor(arrhytmia_labeled$Class), ellipse=TRUE)

ggbiplot(pca_res,ellipse=TRUE,choices=c(3,4), groups =  as.factor(arrhytmia_labeled$Class))

3-spiral

espiral_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/3-spiral_with_clust_labels.txt",header = TRUE )

Only 2 variables, no need for PCA

p = espiral_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(class)))+
  geom_point(aes(color=as.factor(class)))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("3-spiral")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

3MC

MC3_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/3MC_with_clust_labels.txt",header = TRUE )
p = MC3_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(class)))+
  geom_point(aes(color=as.factor(class)))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("3MC")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Flame

flame_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/flame_with_clust_labels.txt",header = TRUE )
p = flame_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(class)))+
  geom_point(aes(color=as.factor(class)))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Flame")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Insect

insect_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/insect_with_clust_labels.txt",header = TRUE )
library(plotly)

plot_ly(x=insect_labeled$attr1, y=insect_labeled$attr2, z=insect_labeled$attr3, type="scatter3d", mode="markers", color=insect_labeled$class)%>%
        layout(title = 'Insect', 
         xaxis = list(title = 'attr1'), 
         yaxis = list(title = 'attr2') ,
         legend = list(title=list(text='<b> Class </b>')))

Lsun

lsun_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/lsun_with_clust_labels.txt",header = TRUE )



lsun_clusters = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/lsun_exact_clustering.txt",header = TRUE )
p = lsun_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(class)))+
  geom_point(aes(color=as.factor(class)))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Lsun")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Plots for out clusters

Plot for k = 2

p = lsun_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(lsun_clusters[,1])))+
  geom_point(aes(color=as.factor(lsun_clusters[,1])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Lsun, k = 2")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Plot for k = 3

p = lsun_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(lsun_clusters[,2])))+
  geom_point(aes(color=as.factor(lsun_clusters[,2])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Lsun, k = 3")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Plot for k = 4

p = lsun_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(lsun_clusters[,3])))+
  geom_point(aes(color=as.factor(lsun_clusters[,3])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Lsun, k = 4")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Plot for k = 5

p = lsun_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(lsun_clusters[,4])))+
  geom_point(aes(color=as.factor(lsun_clusters[,4])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Lsun, k = 5")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Pathbased

pathbased_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/pathbased_with_clust_labels.txt",header = TRUE )



pathbased_clusters = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/pathbased_exact_clustering.txt",header = TRUE )
p = pathbased_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(class)))+
  geom_point(aes(color=as.factor(class)))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Pathbased")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Plots for our k

k = 2

p = pathbased_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(pathbased_clusters[,1])))+
  geom_point(aes(color=as.factor(pathbased_clusters[,1])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Pathbased, for k = 2")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

k = 3

p = pathbased_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(pathbased_clusters[,2])))+
  geom_point(aes(color=as.factor(pathbased_clusters[,2])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Pathbased, for k = 3")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

k = 4

p = pathbased_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(pathbased_clusters[,3])))+
  geom_point(aes(color=as.factor(pathbased_clusters[,3])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Pathbased, for k = 4")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

k = 5

p = pathbased_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(pathbased_clusters[,4])))+
  geom_point(aes(color=as.factor(pathbased_clusters[,4])))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Pathbased, for k = 5")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Blobs

blobs_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/blobs_with_clust_labels.txt",header = TRUE )
p = blobs_labeled%>%
  ggplot(aes(x = x, y =   y  , group = as.factor(class)))+
  geom_point(aes(color=as.factor(class)))+


  labs(y= "y", x ='x')+
    guides(size = "none")+
theme(text         = element_text(size=10, family="LM Roman 10"), axis.title.x=element_text(face="italic")) +
  ggtitle("Blobs")
p + theme(plot.subtitle = element_text(vjust = 1), 
    plot.caption = element_text(vjust = 1)) 

#+labs(subtitle = "")

Iris

iris_labeled = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/iris_with_clust_labels.txt",header = FALSE, skip = 1 )


iris_clusters = read.table("C:/Users/lucii/OneDrive/Documentos/Universidad/quinto/Erasmus_practicas/Rproject/cluster/data/iris_exact_clustering.txt",header = TRUE )
pca_res <- prcomp(iris_labeled%>%select(-V5), scale. = TRUE, center = TRUE)
summary(pca_res)
## Importance of components:
##                           PC1    PC2     PC3     PC4
## Standard deviation     1.7084 0.9560 0.38309 0.14393
## Proportion of Variance 0.7296 0.2285 0.03669 0.00518
## Cumulative Proportion  0.7296 0.9581 0.99482 1.00000
ggbiplot(pca_res, groups = iris_labeled$V5, ellipse=TRUE)

Plots for our k

k = 2

ggbiplot(pca_res, groups = as.factor(iris_clusters[,1]), ellipse=TRUE)

k = 3

ggbiplot(pca_res, groups = as.factor(iris_clusters[,2]), ellipse=TRUE)

k = 4

ggbiplot(pca_res, groups = as.factor(iris_clusters[,3]), ellipse=TRUE)

k = 5

ggbiplot(pca_res, groups = as.factor(iris_clusters[,4]), ellipse=TRUE)